##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 3: Wordscores Replication
#Most of this code follows the replication provided by Herzog and Benoit for the article "The Most Unkindest Cuts: Speaker Selection and Expressed Government Dissent During Economic Crisis"
#This code produces the 

rm(list = ls(all = TRUE))
library(rstudioapi)
library(ggplot2)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))


### DATA ############################## ##############################
# WORDSCORERS
######################################## ##############################

### DATA ##############################
load("3_working_data_speakers.RData")
########################################
data <- dataSub

#Plot Sentiment vs Wordscores

p=ggplot(data,aes(x=textscore,y=Sentiment))+
  geom_point()+
  theme_bw()+
  theme(panel.grid.minor=element_blank(),
        axis.text=element_text(size=18,colour="black"),axis.title=element_text(size=18),
        legend.background = element_rect(color = "black",
                                         fill = "white", size = 0.5, linetype = "solid"),
        legend.text=element_text(size=14),legend.title=element_blank(),legend.position="bottom")+
  labs(x="Wordscores Estimates",y="Sentiment Estimates")

# Variables
# ---------
# election years
election.year <- c(1987.2,1989.5,1992.9,1997.5,2002.3,2007.3,2011.1)


# Plot of average positions over time
# -----------------------------------
# group
data$group <- NA
data$group[data$position=="Opposition"] <- 1
data$group[data$position=="Govt backbencher"] <- 2
data$group[data$position %in% c("Taoiseach","Tánaiste","Minister","Minister of State")] <- 3

data$group <- factor(data$group, labels=c("Opposition","Backbench","Cabinet"))

# construct data set
avgPos <- aggregate(data$textscore, by=list(data$debate_dateStart,data$group), mean)
names(avgPos) <- c("debate_dateStart","group","mean")

avgMin <- aggregate(data$textscore, by=list(data$debate_dateStart,data$group), min)
names(avgMin) <- c("debate_dateStart","group","min")

avgMax <- aggregate(data$textscore, by=list(data$debate_dateStart,data$group), max)
names(avgMax) <- c("debate_dateStart","group","max")

avgSd <- aggregate(data$textscore, by=list(data$debate_dateStart,data$group), sd)
names(avgSd) <- c("debate_dateStart","group","sd")

sumStats <- merge(avgPos,avgMin)
sumStats <- merge(sumStats,avgMax)
sumStats <- merge(sumStats,avgSd)

limits <- aes(ymin=mean-sd, ymax=mean+sd)
ws_backbench_closer_to_oppo=0
dates=unique(sumStats$debate_dateStart)
for(i in dates){
  disttocabinet=sumStats$mean[sumStats$group=="Backbench"&sumStats$debate_dateStart==i]-sumStats$mean[sumStats$group=="Cabinet"&sumStats$debate_dateStart==i]
  disttooppo=sumStats$mean[sumStats$group=="Backbench"&sumStats$debate_dateStart==i]-sumStats$mean[sumStats$group=="Opposition"&sumStats$debate_dateStart==i]
  if(abs(disttocabinet)>abs(disttooppo)){
    ws_backbench_closer_to_oppo=ws_backbench_closer_to_oppo+1
  }
}
ws_backbench_closer_to_oppo

# plot mean positions
p <- ggplot(data=sumStats, aes(x=debate_dateStart, y=mean, group=group, color=group,shape=group,lty=group)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        axis.text.y = element_text(size=16),
        axis.text.x = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title=element_text(size=18),
        plot.title = element_text(size=20,hjust = 0.5))+
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  geom_point(size=4) +
  geom_line(linetype=1, alpha=0.5,size=1) +
  geom_smooth(colour="grey55",lty="solid",size=1,alpha=0.1,method = 'loess') +
  xlab("") +
  ylab("Average position") +
  guides(color = guide_legend(title = "Group:")) +
  scale_x_date(breaks = sort(unique(sumStats$debate_dateStart)), labels=sort(unique(data$budget_year))) +
  theme(axis.text.x = element_text(angle = 30,vjust=-0.05)) +   
  scale_y_continuous(breaks=seq(-0.5,0.5,0.1)) +
  theme(legend.position = "bottom") +
  geom_vline(xintercept = as.numeric(sort(unique(as.Date(data$govtStart)))), dcolor="black", linetype="solid", col="grey") +     
  annotate("text", x=as.Date("1988-05-15"), y=0.5, label="FF Minority\n", size=4) +
  annotate("text", x=as.Date("1992-04-01"), y=0.5, label="FF Coalitions\n", size=4) +
  annotate("text", x=as.Date("1996-04-01"), y=0.5, label="FG-Lab-DL\nCoalition", size=4) +
  annotate("text", x=as.Date("2002-09-01"), y=0.5, label="FF-led Boom Years\n", size=4) +
  annotate("text", x=as.Date("2009-10-01"), y=0.5, label="FF-Greens\nCrisis Years", size=4) +
  annotate("text", x=as.Date("2012-08-01"), y=0.5, label="FG-Lab\nCrisis Years", size=4)


### DATA ############################## ##############################
# SENTIMENT
######################################## ##############################

### DATA ##############################
load("3_working_data_speakers_senti.RData")
########################################
data <- dataSub_senti

# Variables
# ---------
# election years
election.year <- c(1987.2,1989.5,1992.9,1997.5,2002.3,2007.3,2011.1)


# Plot of average positions over time
# -----------------------------------
# group
data$group <- NA
data$group[data$position=="Opposition"] <- 1
data$group[data$position=="Govt backbencher"] <- 2
data$group[data$position %in% c("Taoiseach","Tánaiste","Minister","Minister of State")] <- 3
data$group[data$finance_minister==TRUE] <- 4
data$group[data$opposition_spokesperson==TRUE] <- 5

data$group <- factor(data$group, labels=c("Opposition","Backbench","Cabinet","Finance Minister","Opposition Spokesperson"))

# construct data set
avgPos <- aggregate(data$Sentiment, by=list(data$debate_dateStart,data$group), mean)
names(avgPos) <- c("debate_dateStart","group","mean")

avgMin <- aggregate(data$Sentiment, by=list(data$debate_dateStart,data$group), min)
names(avgMin) <- c("debate_dateStart","group","min")

avgMax <- aggregate(data$Sentiment, by=list(data$debate_dateStart,data$group), max)
names(avgMax) <- c("debate_dateStart","group","max")

avgSd <- aggregate(data$Sentiment, by=list(data$debate_dateStart,data$group), sd)
names(avgSd) <- c("debate_dateStart","group","sd")

sumStats <- merge(avgPos,avgMin)
sumStats <- merge(sumStats,avgMax)
sumStats <- merge(sumStats,avgSd)

limits <- aes(ymin=mean-sd, ymax=mean+sd)
senti_backbench_closer_to_oppo=0
dates=unique(sumStats$debate_dateStart)
for(i in dates){
  disttocabinet=sumStats$mean[sumStats$group=="Backbench"&sumStats$debate_dateStart==i]-sumStats$mean[sumStats$group=="Cabinet"&sumStats$debate_dateStart==i]
  disttooppo=sumStats$mean[sumStats$group=="Backbench"&sumStats$debate_dateStart==i]-sumStats$mean[sumStats$group=="Opposition"&sumStats$debate_dateStart==i]
  if(abs(disttocabinet)>abs(disttooppo)){
    senti_backbench_closer_to_oppo=senti_backbench_closer_to_oppo+1
  }
}
senti_backbench_closer_to_oppo


# plot mean positions
p <- ggplot(data=sumStats, aes(x=debate_dateStart, y=mean, group=group, color=group,shape=group,lty=group)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        axis.text.y = element_text(size=16),
        axis.text.x = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title=element_text(size=18),
        plot.title = element_text(size=20,hjust = 0.5))+
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  geom_point(size=4) +
  geom_line(linetype=1, alpha=0.5,size=1) +
  geom_smooth(colour="grey55",size=1,alpha=0.1,method = 'loess') +
  xlab("") +
  ylab("Average sentiment") +
  scale_x_date(breaks = sort(unique(sumStats$debate_dateStart)), labels=sort(unique(data$budget_year))) +
  theme(axis.text.x = element_text(angle = 30,vjust=-0.05)) +   
  geom_vline(xintercept = as.numeric(sort(unique(as.Date(data$govtStart)))), dcolor="black", linetype="solid", col="grey") +     
  annotate("text", x=as.Date("1988-05-15"), y=2, label="FF Minority\n", size=4) +
  annotate("text", x=as.Date("1992-04-01"), y=2, label="FF Coalitions\n", size=4) +
  annotate("text", x=as.Date("1996-04-01"), y=2, label="FG-Lab-DL\nCoalition", size=4) +
  annotate("text", x=as.Date("2002-09-01"), y=2, label="FF-led Boom Years\n", size=4) +
  annotate("text", x=as.Date("2009-10-01"), y=2, label="FF-Greens\nCrisis Years", size=4) +
  annotate("text", x=as.Date("2012-08-01"), y=2, label="FG-Lab\nCrisis Years", size=4)
p
